
***************************************************************************************************************
/*
THIS DO-FILE: creates linked parent-kid datasets
*/
***************************************************************************************************************

clear all
set more off
set maxvar 20000

***************************************************************************************************************

use "parent_sample.dta", clear
merge 1:m hhidpn using "kidsandhelpers_sample.dta", keep(1 3)

*child selection criteria
tab selection_criterion
tab selection_criterion if nmaxchild0!=1

tab selectedkid
count if selectedkid==1 & helphours_overall>0

*multiple kid stuff
bys hhidpn: gen firstobs=1 if _n==1
gen kidinsample=1 if (kinw4==1 | kinw5==1 | kinw6==1 | kinw7==1 | kinw8==1 | kinw9==1 | kinw10==1 | kinw11==1 | kinw12==1)
bys hhidpn: egen numkids=sum(kidinsample)
gen numkidscat=numkids
replace numkidscat=5 if numkids>5 & numkids!=.
gen kidcaregiver=(helphours_overall>0) if helphours_overall!=.
bys hhidpn: egen numkidcaregiver=sum(kidcaregiver)
bys hhidpn: egen totalhours=sum(helphours_overall)
bys hhidpn: egen maxhours=max(helphours_overall)
gen hours_maxcaregiver=maxhours/totalhours

*% of sample:
tab numkidscat if firstobs==1 & numkidcaregiver>=1 
*tab numkidscat if firstobs==1 & numkidcaregiver>=1, sum(numkidcaregiver)
*% only 1 helper
tab numkidcaregiver numkidscat if firstobs==1 & numkidcaregiver>=1, col
*% hours supplied by main
tab numkidscat if firstobs==1 & numkidcaregiver>=1, sum(hours_maxcaregiver)
*% hours supplied by main if >1 caregiver
tab numkidscat if firstobs==1 & numkidcaregiver>=2, sum(hours_maxcaregiver)

save "full_sample.dta", replace //full_sample.dta contains parents+kids+helpers (so potentially multiple obs per parent)
******************************************************************************************

******************************************************************************************
*PREPARE THE LONG DATA
use "full_sample.dta", clear

keep hhidpn rabyear inw* age* assliv* nh* assets* income* inc_ssret* inc_pen* ltci* medicaid* widowed* nevermarried* avinc412 PIpctile cohort female raevbrn ///
	 kidid selectedkid selection_criterion kabyear kfemale kmale keduc* kwill* kanyxfertokid* kamtxfertokid* kanyxferfromkid* kidhelper kidnonhelper nonkidhelper ///
	 rec_inf* rec_form* hrs_inf* hrs_form* hrs_total* helper_hpm* helphours_overall helper_spouse* helper_child* helper_othfam* helper_nonfam* ///
	 kinw* kidnear* kidworkNT* kidworkPT* kidworkFT* grandkidcare* kmstat* kkids* kage* kownhome* kresd* kinc*
*reshape it first so person-year observations
reshape long inw age income inc_ssret inc_pen assets assliv nh ltci medicaid widowed nevermarried ///
			 kinw kidnear kidworkNT kidworkPT kidworkFT grandkidcare kmstat kkids kage kownhome kresd kinc kwill kanyxfertokid kamtxfertokid kanyxferfromkid ///
			 helper_hpm rec_inf rec_form helper_spouse helper_child helper_othfam helper_nonfam hrs_inf hrs_form hrs_total ///
			 , i(hhidpn kidid) j(wave)
drop if inlist(wave,1,2,3)

*other variables
gen kmarried=(inlist(kmstat,1,2)) if inlist(kmstat,0,1,2,3)
gen coreside=inlist(kresd,1,2) if inlist(kresd,0,1,2)

*HEALTH STATUS
replace rec_inf=0  if rec_inf==.  & inw==1  //those who aren't sick have missings
replace rec_form=0 if rec_form==. & inw==1  //those who aren't sick have missings

replace rec_inf=0 if inlist(hrs_inf,0,.)   & inw==1 //these have a informal helper but logged 0 hrs in last month
replace rec_form=0 if inlist(hrs_form,0,.) & inw==1 //these have a formal helper but logged 0 hrs in last month

*overwrite health status with assliv and nursing home status
replace assliv=0 if nh==1
replace rec_form=1 if assliv==1 | nh==1

gen rec_either=(rec_form==1 | rec_inf==1) if inw==1
gen rec_both  =(rec_form==1 & rec_inf==1) if inw==1

gen 	careneed=1 //no care needs
replace careneed=2 if rec_either==1 & ((hrs_total>0 & hrs_total<=100) | assliv==1) //needs 20 hours of care (experiment with cutoff later somehow)
replace careneed=3 if rec_either==1 & ((hrs_total>100 & hrs_total!=.) | nh==1)
replace careneed=0 if inw==0 //dead

gen sick=(inlist(careneed,2,3)) if inlist(careneed,1,2,3)
gen informaldummy =(rec_inf==1 & assliv!=1 & nh!=1) if rec_inf!=.
gen informaldummy2=(rec_inf==1 & rec_form!=1)       if rec_inf!=.

*selected vs nonselected kid
gen agegap=kabyearbg-rabyear

gen helper_hpmwithzero=helper_hpm
replace helper_hpmwithzero=0 if kinw==1 & helper_hpm==.
gen helper_hpwwithzero=helper_hpmwithzero/4
tab selectedkid if kinw==1 & sick==1, sum(helper_hpwwithzero)
sum helper_hpwwithzero if selectedkid==1 & helphours_overall>0 & helphours_overall<. & sick==1
sum helper_hpwwithzero if selectedkid==1 & helphours_overall<. & sick==1, det

gen kincover35k=inlist(kinc,3,4,5) if inrange(kinc,1,7)

save "full_sample_long.dta", replace
******************************************************************************************

******************************************************************************************
*PREPARE MY SAMPLE
use "full_sample_long.dta", clear

keep if selectedkid==1 //still contains info about total nonfam care (rec_form and hrs_form), total fam care (rec_inf and hrs_inf), total all care (hrs_total)
*can see how far off kid hours is from hrs_inf by comparing hrs_inf to helper_hpm

*summary statistics (before taking out cohort 6)
sum female widowed medicaid ltci if inw==1
sum age if inw==1, det
sum raevbrn if inw==1, det

tab careneed if inw==1
bys careneed: sum informaldummy
sum hrs_inf if careneed==2 & informaldummy==1, det
sum hrs_inf if careneed==3 & informaldummy==1, det

*for a footnote
sum rec_inf if sick==1 & informaldummy==0
sum helper_hpwwithzero if rec_inf==1 & sick==1 & informaldummy==0, det
sum nh if rec_inf==1 & sick==1 & informaldummy==0
sum rec_form if sick==1 & informaldummy==1
sum helper_hpwwithzero if rec_form==1 & sick==1 & informaldummy==1, det


sum income if inw==1, det
sum assets if inw==1, det
count if inw==1 & wave==4

gen inc_fracSS=inc_ssret/income

save "my_sample_long.dta", replace
******************************************************************************************

